---
title: "Correlations and Effect size calculations and probability"
author: "A hEMEDAN"
date: "03/01/2021"
---
  
# Wrapper Function to calculate the cohen distance
cohensD <- function(data,group_var,groups,scores,ref_group){
  data <- as_tibble(data)
  output <- matrix(ncol=length(scores), nrow=length(groups))
  for(i in groups){
    for(j in scores){
      m1 <- mean(data.matrix(data[,j][which(data[group_var]==i), ]),na.rm = T)
      m2 <- mean(data.matrix(data[,j][which(data[group_var]==ref_group), ]),na.rm = T)
      s1 <- sd(data.matrix(data[,j][which(data[group_var]==i), ]),na.rm = T)
      s2 <- sd(data.matrix(data[,j][which(data[group_var]==ref_group), ]),na.rm = T)
      n1 <- length(as.matrix(data[,j][which(data[group_var]==i), ]))
      n2 <- length(as.matrix(data[,j][which(data[group_var]==ref_group), ]))
      # lx <- n1- 1
      # ly <- n2- 1
      md  <- m1-m2        ## mean difference (numerator)
      # csd <- lx * (s1^2) + ly * (s2^2)
      # csd <- csd/(lx + ly)
      # csd <- sqrt(csd)                     ## common sd computation
      # cd  <- md/csd; # Cohens D
      sdpool <- sqrt((s1^2+s2^2)/2)      ## pooled sd computation
      cd <- md/sdpool
      rownames(output) <- groups
      colnames(output) <- scores
      output[i,j] <- cd
    }
  }
  return(output)
}
# These functions compute the common-language effect size

# brute-force solution.
cles_brute.fnc <- function(runs = 1e5, variable, group, baseline, data) {
  # Select the observations for group 1
  x <- data[data[[group]] == baseline, variable]
  # Select the observations for group 2
  y <- data[data[[group]] != baseline, variable]
  # Matrix with difference between XY for all pairs (Guillaume Rousselet's suggestion)
  m <- outer(x,y,FUN="-")
  # Convert to booleans; count ties as half true.
  m <- ifelse(m==0, 0.5, m>0)
  # Return proportion of TRUEs
  qxly <- mean(m)
  return(qxly)
}

# McGraw & Wong's (1992) method.


cles_comp.fnc <- function(variable, group, baseline, data) {
  # Select the observations for group 1
  x <- data[data[[group]] == baseline, variable]
  
  # Select the observations for group 2
  y <- data[data[[group]] != baseline, variable]
  
  # Mean difference between x and y
  diff <- (mean(x) - mean(y))
  
  # Standard deviation of difference
  stdev <- sqrt(var(x) + var(y))
  
  # Probability derived from normal distribution
  # that random x is higher than random y -
  # or in other words, that diff is larger than 0.
  p.norm <- 1 - pnorm(0, diff, sd = stdev)
  
  # Return result
  return(p.norm)
}

# Both

cles.fnc <- function(variable, group, baseline, data, print = TRUE) {
  cles_brute <- cles_brute.fnc(runs = 0, variable = variable, group = group, baseline = baseline, data = data)
  cles_comp <- cles_comp.fnc(variable = variable, group = group, baseline = baseline, data = data)
  
  results <- list(algebraic = cles_comp,
                  brute = cles_brute)
  
  if(print == TRUE) {
    cat("Common-language effect size:",
        "\n",
        "\nThe probability that a random ", variable, " observation from group ", baseline, "\n",
        "is higher/larger than a random ", variable, " observation from the other group(s):",
        "\n",
        "\n    Algebraic method:   ", round(cles_comp, 2),
        "\n    Brute-force method: ", round(cles_brute, 2), 
        "\n",
        # "\n(brute-force method based on ", runs, " runs)\n",
        sep = "")
  }
  
  return(results)
}
#t test and wilcoxon test
library(matrixTests)
row_t_welch(case,control)

# initialize a list to store the p_values
p_values <- vector("list", nrow(case_control))

for(i in seq_along(1: nrow(case_control))){
  p_values[i] = wilcox.test(case_control_control,case_control_case, paired = TRUE, alternative = "two.sided", exact = FALSE)$p.value
  
}
# make it a data.frame
p_values = data.frame(p_values = sapply(p_values, c))            